from matplotlib import pyplot as plt
import colorcet
import tomlplot
Visualization of big data is hard.
For big raster image, ploting every pixels consumes too much time and memory and common picture format (e.g. png) do not support too big image. One common solution is to resample to a smaller image and plot it, which is the default option for common plot package (e.g. matplotlib). But it will reduce the resolution and prevent showing local features.
For big point cloud image, besides the time and memory consumming, the overlapping of rendered points is also a problem.
Here we provide the plot functions based on Holoviews and Datashader fix the problems. The point cloud is resampled to a small raster image and then plotted on the screen. The pixel size of the raster image is exactly matching the resolution of the screen to maximize the accuracy. The plot is interative and the resampling is dynamic, which means when you zoom in, a more precise raster image is automatically generated and plotted.
Note that the plotting functions here make use of dynamic updates, which require a running Jupyter server. When viewed statically (as this documentation website), the plots will not update fully when you zoom.
import holoviews as hv
import datashader as ds
import spatialpandas
import zarr
import numpy as np
from holoviews import optshv.extension('bokeh')raster
raster (p:numpy.ndarray, kdims:list, pdim:str, bounds:tuple=None, prange:tuple=None, aggregator=<class 'datashader.reductions.first'>, use_hover:bool=True)
Interative visulization of a raster image.
| Type | Default | Details | |
|---|---|---|---|
| p | ndarray | data to be plot, shape (n,m) | |
| kdims | list | name of coordinates (x, y) | |
| pdim | str | name of data to be plotted | |
| bounds | tuple | None | extent of the raster, (x0, y0, x1 and y1), (0,0,m,n) )by default |
| prange | tuple | None | range of data to be plotted, it is interactively adjusted by default |
| aggregator | type | first | aggregator for data rasterization |
| use_hover | bool | True | use hover to show data |
rslc_ = '../Tutorials/CLI/load_data/rslc.zarr/'
rslc = zarr.open(rslc_,'r')[...,0:3]raster_plot = raster(np.angle(rslc[...,2]*rslc[...,0].conj()),kdims=['Range','Azimuth'],pdim='Phase',prange=(-np.pi,np.pi))raster_plot.opts(opts.Image(cmap='colorwheel',width=600, height=400, colorbar=True, xlabel='Range', ylabel='Azimuth',invert_yaxis=True))# plt.imshow(np.angle(rslc[...,2]*rslc[...,0].conj()))raster_stack
raster_stack (p:numpy.ndarray, kdims:list, tdim:str, pdim:str, bounds:tuple=None, t:list=None, prange:tuple=None, aggregator=<class 'datashader.reductions.first'>, use_hover:bool=True)
Interative visulization of a raster image.
| Type | Default | Details | |
|---|---|---|---|
| p | ndarray | data to be plot, shape (n,m,l) | |
| kdims | list | name of coordinates (x, y) | |
| tdim | str | name of coordiantes (t,) | |
| pdim | str | name of data to be plotted | |
| bounds | tuple | None | extent of the raster, (x0, y0, x1 and y1), (0,0,m,n) )by default |
| t | list | None | t coordinate of the plot, len: l, list of string. [‘0’,‘1’,…] by default |
| prange | tuple | None | range of data to be plotted, it is interactively adjusted by default |
| aggregator | type | first | aggregator for data rasterization |
| use_hover | bool | True | use hover to show data |
rslc_ = '../Tutorials/CLI/load_data/rslc.zarr/'
rslc = zarr.open(rslc_,'r')[:]
dates_str = toml.load('../Tutorials/CLI/load_data/meta.toml')['dates']raster_stack_plot = raster_stack(np.angle(rslc*rslc[...,[0]].conj()),
kdims=['Range','Azimuth'], tdim = 'Date',pdim='Phase',
t=dates_str,
prange=(-np.pi,np.pi))hv.output(widget_location='bottom',holomap='scrubber')raster_stack_plot.opts(opts.Image(cmap='colorwheel',width=600, height=400, colorbar=True, xlabel='Range', ylabel='Azimuth',invert_yaxis=True))Dask array is currently not supported. It will be supported in the future.
# import dask
# from dask import array as da
# from dask.distributed import Client, LocalCluster
# cluster = LocalCluster(); client = Client(cluster)
# rslc = da.from_zarr(rslc_)
# raster_stack_plot = raster_stack(da.angle(rslc*rslc[...,[0]].conj()),
# kdims=['Range','Azimuth'], tdim = 'Date',pdim='Phase',
# t=dates_str,
# prange=(-np.pi,np.pi))
# raster_stack_plot.opts(opts.Image(cmap='colorwheel',width=600, height=400, colorbar=True, xlabel='Range', ylabel='Azimuth',invert_yaxis=True))
# cluster.close(); client.close()points
points (data:Union[pandas.core.frame.DataFrame,dask.dataframe.core.DataF rame], kdims:list, pdim:str, prange:tuple=None, aggregator=<class 'datashader.reductions.first'>, use_hover:bool=True, vdims:list=None, google_earth:bool=False)
Interative visulization of a point cloud image.
| Type | Default | Details | |
|---|---|---|---|
| data | Union | dataset to be plot | |
| kdims | list | colomn name of Mercator coordinate in dataframe | |
| pdim | str | column name of data to be plotted in dataframe | |
| prange | tuple | None | range of data to be plotted, it is interactively adjusted by default |
| aggregator | type | first | aggregator for data rasterization |
| use_hover | bool | True | use hover to show data |
| vdims | list | None | column name of data showed on hover except kdims and pdim. These two are always showed. |
| google_earth | bool | False | if use google earth imagery as the background |
Here we plot interferogram after DS processing:
ds_ph_ = '../Tutorials/CLI/ds_processing/ds_ph.zarr/'
ds_e_ = '../Tutorials/CLI/ds_processing/ds_e.zarr/'
ds_n_ = '../Tutorials/CLI/ds_processing/ds_n.zarr/'
ds_lon_ = '../Tutorials/CLI/ds_processing/ds_lon.zarr/'
ds_lat_ = '../Tutorials/CLI/ds_processing/ds_lat.zarr/'
ds_ph = zarr.open(ds_ph_,'r')[:]
ds_e = zarr.open(ds_e_,'r')[:]
ds_n = zarr.open(ds_n_,'r')[:]
ds_lon = zarr.open(ds_lon_,'r')[:]
ds_lat = zarr.open(ds_lat_,'r')[:]# data = pd.DataFrame({'e':ds_e,'n':ds_n,'phase':np.angle(ds_ph[:,10]),'lon':ds_lon,'lat':ds_lat})
coordinates = spatialpandas.geometry.PointArray((ds_e, ds_n))
data = spatialpandas.GeoDataFrame({'geometry':coordinates,'phase':np.angle(ds_ph[:,10]),'lon':ds_lon,'lat':ds_lat})
plot = points(data,kdims=['e','n'],pdim='phase',prange=(-np.pi,np.pi),vdims=['lon','lat'],google_earth=True)Make some options on the plot:
plot.opts(opts.Image(cmap='colorwheel',width=600, height=400, colorbar=True, xlabel='Longitude', ylabel='Latitude'),
opts.Points(marker='o',size=10,tools=['hover']))Dask dataframe is also supported:
import dask
from dask import array as da
from dask import dataframe as dd
from dask.distributed import Client, LocalClustercluster = LocalCluster(); client = Client(cluster)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)
ds_ph = da.from_zarr(ds_ph_)
ds_e = da.from_zarr(ds_e_)
ds_n = da.from_zarr(ds_n_)
ds_lon = da.from_zarr(ds_lon_)
ds_lat = da.from_zarr(ds_lat_)
ds_data = da.stack([ds_e,ds_n,ds_lon,ds_lat,da.angle(ds_ph[:,10])]).T
ds_df = dd.from_dask_array(ds_data,columns=['e','n','lon','lat','phase'])
ds_df = ds_df.persist() # only do it if the memory is enough
plot = points(ds_df,kdims=['e','n'],pdim='phase',prange=(-np.pi,np.pi),vdims=['lon','lat'],google_earth=True)plot.opts(opts.Image(cmap='colorwheel',width=600, height=400, colorbar=True, xlabel='Longitude', ylabel='Latitude'),
opts.Points(marker='o',size=10,tools=['hover']))cluster.close(); client.close()points_stack
points_stack (data:Union[pandas.core.frame.DataFrame,dask.dataframe.core .DataFrame], kdims:list, pdata:Union[pandas.core.frame.Data Frame,dask.dataframe.core.DataFrame], pdim:str, prange:tuple=None, aggregator=<class 'datashader.reductions.first'>, use_hover:bool=True, vdims:list=None, google_earth:bool=False)
Interative visulization of a stack of point cloud images.
| Type | Default | Details | |
|---|---|---|---|
| data | Union | common data in all plots | |
| kdims | list | colomn name of Mercator coordinate in dataframe | |
| pdata | Union | data to be plotted as color | |
| pdim | str | label of pdata | |
| prange | tuple | None | range of pdata, it is interactively adjusted by default |
| aggregator | type | first | aggregator for data rasterization |
| use_hover | bool | True | use hover to show other column |
| vdims | list | None | column name of data showed on hover except kdims which are always showed. |
| google_earth | bool | False | if use google earth imagery as the background |
ds_ph_ = '../Tutorials/CLI/ds_processing/ds_ph.zarr/'
ds_e_ = '../Tutorials/CLI/ds_processing/ds_e.zarr/'
ds_n_ = '../Tutorials/CLI/ds_processing/ds_n.zarr/'
ds_lon_ = '../Tutorials/CLI/ds_processing/ds_lon.zarr/'
ds_lat_ = '../Tutorials/CLI/ds_processing/ds_lat.zarr/'
ds_ph = zarr.open(ds_ph_,'r')[:]
ds_e = zarr.open(ds_e_,'r')[:]
ds_n = zarr.open(ds_n_,'r')[:]
ds_lon = zarr.open(ds_lon_,'r')[:]
ds_lat = zarr.open(ds_lat_,'r')[:]
meta_file = '../CLI/raw/meta.toml'
with open(meta_file,'r') as f:
meta_data = toml.load(f)
dates = meta_data['dates']# data = pd.DataFrame({'e':ds_e,'n':ds_n,'lon':ds_lon,'lat':ds_lat,'idx':np.arange(len(ds_e))})
coordinates = spatialpandas.geometry.PointArray((ds_e, ds_n))
data = spatialpandas.GeoDataFrame({'geometry':coordinates,'lon':ds_lon,'lat':ds_lat,'idx':np.arange(len(ds_e))})
pdata = pd.DataFrame(np.angle(ds_ph),columns=dates)plot_stack = points_stack(data,['e','n'],pdata,'phase',prange=(-np.pi,np.pi),vdims=['lon','lat','idx'],google_earth=True)hv.output(widget_location='bottom',holomap='scrubber')plot_stack.opts(opts.Image(cmap='colorwheel',width=600, height=400, colorbar=True, xlabel='Longitude', ylabel='Latitude'),
opts.Points(marker='o',size=10))Plot selected images simutaneously is also supported:
plot_stack.layout()[dates[0:4]].cols(2)Dask is also supported
cluster = LocalCluster(); client = Client(cluster)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)Error. nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)
ds_ph = da.from_zarr(ds_ph_)
ds_e = da.from_zarr(ds_e_)
ds_n = da.from_zarr(ds_n_)
ds_lon = da.from_zarr(ds_lon_)
ds_lat = da.from_zarr(ds_lat_)
meta_file = '../CLI/raw/meta.toml'
with open(meta_file,'r') as f:
meta_data = toml.load(f)
dates = meta_data['dates']
data = da.stack([ds_e,ds_n,ds_lon,ds_lat]).T
data = dd.from_dask_array(data,columns=['e','n','lon','lat'])
pdata = dd.from_dask_array(da.angle(ds_ph),columns=dates)
data = data.persist(); pdata=pdata.persist() # only do it if the memory is enoughplot_stack = points_stack(data,kdims=['e','n'],pdata=pdata,pdim='phase',prange=(-np.pi,np.pi),vdims=['lon','lat'],google_earth=True)hv.output(widget_location='bottom',holomap='scrubber')plot_stack.opts(opts.Image(cmap='colorwheel',width=600, height=400, colorbar=True, xlabel='Longitude', ylabel='Latitude'),
opts.Points(marker='o',size=10))cluster.close(); client.close()